Data Analysis#

fig = go.Figure(
    data=[
        go.Table(
            header=dict(values=["Dimension", "Value"],line_color='#000000',
                        fill_color='#ffffff', font_size=18 ,  ),
            cells=dict(
                        fill_color='#ffffff',
                        line_color='#ffffff',
                        font_size=16,
                        height=30,
                values=[
                    [
                        "Total number of projects",
                        "Github projects",
                        "Gitlab projects",
                        "Other platforms",
                        "Number of projects in personal namespace",
                        "Total stars of all projects",
                        "Total contributers of all projects",
                        "Active GitHub projects",
                        "Inactive GitHub projects",
                        "Projects with contribution guide in %",
                        "Projects with code of conduct in %",
                        "Projects accepting donations in %",
                        "Median number of commits",
                        "Median stargazers",
                        "Median stars last year",
                        "Median Development Distribution Score",
                        "Median number of contributors",
                        "Median closed issues last year",
                        "Median commits last year",
                        "Median age in years",
                    ],
                    [
                        df_raw["project_name"].count(),
                        df_raw["platform"].value_counts()["github"],
                        df_raw["platform"].value_counts()["gitlab"],
                        df_raw["platform"].value_counts()["custom"],
                        df_raw["project_name"].count() - df_raw["organization"].count(),
                        df_raw["stargazers_count"].sum(),
                        df_raw["contributors"].sum(),
                        df_raw["project_active"].value_counts()[True],
                        df_raw["project_active"].value_counts()[False],
                        round(df_raw["contribution_guide"].value_counts(normalize=True)[True]*100,2),
                        round(df_raw["code_of_conduct"].value_counts(normalize=True)[True]*100,2),
                        round(df_raw["accepts_donations"].value_counts(normalize=True)[True]*100,2),
                        df_raw["total_number_of_commits"].median(),
                        df_raw["stargazers_count"].median(),
                        df_raw["stars_last_year"].median(),
                        round(df_raw["development_distribution_score"].median(),4),
                        df_raw["contributors"].median(),
                        df_raw["issues_closed_last_year"].median(),
                        df_raw["total_commits_last_year"].median(),
                        round(df_raw["project_age_in_years"].median(),2),
                        
                    ],
                ]
            ),
        )
    ]
)



fig.update_layout(
height=700,
width=700
)
fig.show()
### KK: this is where a clear object naming convention + comments would really help: is syntax df[df_raw[..]] appropriate here? 
### KK: it might be helpful to plot boxplots for the below scores per category to better show their distribution, including median

df_personal_projects = df_active[df_active["organization"].isna()]
df_organization_projects = df_active[df_active["organization"].notna()]
df_inactive = df_raw[(df_raw["project_active"] == False)]
df_top_stargazers = df_active[(df_active["stargazers_count"] > 100)]

fig = go.Figure(
    data=[
        go.Table(
            header=dict(values=["Group", "Median DDS"],line_color='#000000',fill_color='#ffffff',font_size=18),
            cells=dict(
                        line_color='#ffffff',fill_color='#ffffff', font_size=16, height =30,
                values=[
                    [
                        "All projects",
                        "Active projects in personal namespace",
                        "Active organisation projects",
                        "Active projects",
                        "Inactive projects",
                        "Active projects over than 50 Stars",
                        "Projects with most contributors"

                    ],
                    [
                        round(df_raw["development_distribution_score"].median(),3),
                        round(df_personal_projects["development_distribution_score"].median(),3),
                        round(df_organization_projects["development_distribution_score"].median(),3),
                        round(df_active["development_distribution_score"].median(),3),
                        round(df_inactive["development_distribution_score"].median(),3),
                        round(df_top_stargazers["development_distribution_score"].median(),3),
                        round(df_active.nlargest(50, "contributors")["development_distribution_score"].median(),3)
                    ],
                ]
            ),
        )
    ]
)

fig.update_layout(
width=700,
height=450

)

fig.show()
license_his = (
    df_active["license"]
    .value_counts()
    .to_frame()
    .rename_axis("license_names")
    .reset_index()
)
fig = px.pie(license_his, values="license", names="license_names", color_discrete_sequence=color_discrete_sequence, hole=0.2)

fig.update_layout(title="Distribution of Licences", showlegend=False, font_size=16)
fig.update_traces(textposition='inside', textinfo='percent+label', marker=dict(line=dict(color='#000000', width=1)))
fig.show()
# alternative to the pie chart in cell 23
# main point: ~80% of all open source licences fall under 5 types
main_license_types = ['BSD-3-Clause', 'MIT', 'GPL-3.0', 'CUSTOM', 'Apache-2.0']
alt_df_active = df_active.copy()

alt_df_active['pooled_license'] = np.where(
     alt_df_active['license'].isin(main_license_types), alt_df_active['license'], 'Other')

alt_license_his = (
    alt_df_active["pooled_license"]
    .value_counts()
    .to_frame()
    .rename_axis("license_names")
    .reset_index()
)
alt_fig = px.pie(alt_license_his, values="pooled_license", names="license_names", color_discrete_sequence=color_discrete_sequence, hole=0.2)

alt_fig.update_layout(title="Distribution of Licenses", showlegend=False, font_size=16)
alt_fig.update_traces(textposition='inside', textinfo='percent+label', marker=dict(line=dict(color='#000000', width=1)))
alt_fig.show()
fig = px.histogram(
    df_active,
    x="project_age_in_years",
    nbins=50,
    title="Distribution of Project Age in Years",
)
fig.update_layout(
    yaxis_title= None,
    xaxis_title="Project Age",
)
fig.update_traces(marker_color=marker_color)
fig.show()
fig = px.histogram(
    df_active,
    x="total_number_of_commits",
    nbins=50,
    title="Distribution of Total Commits",
)
fig.update_layout(
    yaxis_title="Projects",
    xaxis_title="Project Total Commits",
)
fig.update_traces(marker_color=marker_color)
fig.show()
topic_his = (
    df_active["topic"]
    .value_counts()
    .to_frame()
    .rename_axis("topic_names")
    .reset_index()
)

fig = px.bar(
    df_active.groupby('topic')['contributors'].sum().reset_index().sort_values('contributors',ascending=[False]),
    x="contributors",
    y="topic",
    orientation="h",
)

fig.update_layout(
    height=1000,  # Added parameter
    yaxis_title= None,
    xaxis_title="Contributors",
    title="Contributors within Topics",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
    )
)
fig.update_traces(marker_color=marker_color)
fig.update(layout_showlegend=False)
topic_his = (
    df_active["topic"]
    .value_counts()
    .to_frame()
    .rename_axis("topic_names")
    .reset_index()
)

fig = px.bar(
    topic_his,
    x="topic",
    y="topic_names",
    orientation="h",
)

fig.update_layout(
    height=1000,  # Added parameter
    yaxis_title= None,
    xaxis_title="Projects",
    title="Projects within Topics",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
    )
)
fig.update_traces(marker_color=marker_color)
fig.update(layout_showlegend=False)
license_dominating_language = (
    df_active["dominating_language"]
    .value_counts()
    .to_frame()
    .rename_axis("dominating_language_names")
    .reset_index()
)
license_dominating_language
license_dominating_language = license_dominating_language[(license_dominating_language["dominating_language"] > 4)]
fig = px.pie(license_dominating_language, values="dominating_language", names="dominating_language_names", color_discrete_sequence=color_discrete_sequence, hole=0.2)

fig.update_layout(title="Distribution of Programming Languages", showlegend=False, font_size=16)
fig.update_traces(textposition='inside', textinfo='percent+label', marker=dict(line=dict(color='#000000', width=1)))
fig.show()
# KK I thing the question that we should be asking: are there similar patterns followed by most topics? If so, whatare they? If not, what are the fields that stand out and what is the difference?

# df_sorted = df.groupby(['topic'], as_index=False)['dominating_language'].agg('sum')
df_language_distribution = (
    df_active.value_counts(["topic", "dominating_language"]).to_frame().reset_index()
)

df_language_distribution.rename(columns={0: "counts"}, inplace=True)
fig = px.scatter(
    df_language_distribution, x="dominating_language", y="topic", size="counts", 
)


fig.update_layout(
    height=1000,  # Added parameter
    width=1200,
    xaxis_title= None,
    yaxis_title= None,
)
fig.update_traces(marker_color=marker_color)


fig.show()
# KK I thing the question that we should be asking: are there similar patterns followed by most topics? If so, whatare they? If not, what are the fields that stand out and what is the difference?

# df_sorted = df.groupby(['topic'], as_index=False)['dominating_language'].agg('sum')
df_license_distribution = (
    df_active.value_counts(["topic", "license"]).to_frame().reset_index()
)

df_license_distribution.rename(columns={0: "counts"}, inplace=True)
fig = px.scatter(df_license_distribution, x="license", y="topic", size="counts")


fig.update_layout(
    height=1000,  # Added parameter
    xaxis_title="License",
    yaxis_title=None,
    title= None,
    autosize=True,
)
fig.update_traces(marker_color=marker_color)


fig.show()
fig = px.histogram(
    df_active,
    x="contributors",
    nbins=100,
    title=" Contributors",
)
fig.update_layout(
    yaxis_title="Projects",
    xaxis_title="Contributors",
)
fig.update_traces(marker_color=marker_color)
fig.show()
most_listed_projects = df_active["git_namespace"].value_counts(ascending=False).to_frame().rename_axis("Namespace").reset_index().rename(columns={"git_namespace": "counts"})
fig = go.Figure(data=[go.Table(
    header=dict(values=list(most_listed_projects.columns), line_color='#000000', fill_color='#ffffff',font_size=18 ),
    cells=dict(line_color='#ffffff', fill_color='#ffffff', font_size=16, height=30, values=[most_listed_projects.Namespace, most_listed_projects.counts])
)])

fig.update_layout(
autosize=False,
)

fig.show()
oldest_projects = df_active.nlargest(40, "project_age_in_years")


fig = px.bar(
    oldest_projects,
    x=oldest_projects["project_age_in_years"],
    y=oldest_projects["project_name"],
    orientation="h",
    range_x=(9.6, 14),
    custom_data=["oneliner","topic","git_url"],
    color=oldest_projects["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1000,  # Added parameter
    yaxis_title=None,
    xaxis_title="Project Age in Years",
    title="The oldest Projects still active",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
    )
)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)

fig.update(layout_showlegend=False)
contributors = df_active.nlargest(40, "contributors")

fig = px.bar(
    contributors,
    x=contributors["contributors"],
    y=contributors["project_name"],
    orientation="h",
    title="Projects with most contributors",
    custom_data=["oneliner","topic","git_url"],
    color=contributors["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1200,  # Added parameter
    xaxis_title="Contributors",
    yaxis_title= None,
    title="Projects with the most contributors",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
    )
)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)

fig.update(layout_showlegend=False)
top_stargazers = df_active.nlargest(40, "stargazers_count")

fig = px.bar(
    top_stargazers,
    x=top_stargazers["stargazers_count"],
    y=top_stargazers["project_name"],
    orientation="h",
    custom_data=["oneliner","topic","git_url"],
    color=top_stargazers["development_distribution_score"],
    color_continuous_scale=color_continuous_scale

)

fig.update_layout(
    height=1000,  # Added parameter
    xaxis_title="Stars",
    yaxis_title=None,
    title="Projects with the most Stars",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
    )
)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)


fig.update(layout_showlegend=False)
df_top_100_stargazers = df_active[(df_active["stargazers_count"]) > 100].copy()
df_top_100_stargazers["star_growth"] = (
    df_top_100_stargazers["stars_last_year"] / df_top_100_stargazers["stargazers_count"]
)

df_top_40_star_growth = df_top_100_stargazers.nlargest(40, "star_growth")
fig = px.bar(
    df_top_40_star_growth,
    x=df_top_40_star_growth["star_growth"] * 100,
    y=df_top_40_star_growth["project_name"],
    orientation="h",
    custom_data=["oneliner","topic","git_url"],
    color=df_top_40_star_growth["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1000,  # Added parameter
    xaxis_title="Star Growth last Year [%]",
    yaxis_title= None,
    title="Projects with the highest Star Growth",
    hoverlabel=dict(
    bgcolor="white"),
    coloraxis_colorbar=dict(
    title="DDS",
    ),
)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)
df_top_40_growth = df_active.nlargest(40, "total_commits_last_year")
df_top_40_growth = df_top_40_growth[df_top_40_growth["project_name"] != "ElexonDataPortal"]
fig = px.bar(
    df_top_40_growth,
    x=df_top_40_growth["total_commits_last_year"],
    y=df_top_40_growth["project_name"],
    orientation="h",
    color=df_top_40_growth["development_distribution_score"],
    custom_data=["oneliner","topic","git_url"],
    color_continuous_scale=color_continuous_scale,
)

fig.update_layout(
    height=1000,  # Added parameter
    xaxis_title="Commit Growth last Year [%]",
    yaxis_title= None,
    title="Projects with the highest Commit Growth",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
)
)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)
df_total_score = df_active.nlargest(40, "total_score")

fig = px.bar(
    df_total_score,
    x=df_total_score["total_score"],
    y=df_total_score["project_name"],
    orientation="h",
    range_x=(0.85, 1),
    custom_data=["oneliner","topic","git_url"],
    color = df_total_score["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1000,  # Added parameter
    xaxis_title="Total Score",
    yaxis_title=None,
    title="Top Total Score",
    coloraxis_colorbar=dict(
    title="DDS",
    ),   
    hoverlabel=dict(
    bgcolor="white"
)
)
fig.update(layout_showlegend=False)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)
df_activity_score = df_active.nlargest(40, "activity")

fig = px.bar(
    df_activity_score,
    x=df_activity_score["activity"],
    y=df_activity_score["project_name"],
    orientation="h",
    range_x=(2.9, 3.2),
    custom_data=["oneliner","topic","git_url"],
    color=df_activity_score["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1000,  # Added parameter
    xaxis_title="Activity Score",
    yaxis_title=None,
    title="Projects with the highest Activity Score",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
)
)

fig.update(layout_showlegend=False)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)
df_size_score = df_active.nlargest(40, "size")

fig = px.bar(
    df_size_score,
    x=df_size_score["size"],
    y=df_size_score["project_name"],
    orientation="h",
    range_x=(3.75, 4),
    custom_data=["oneliner","topic","git_url"],
    color=df_size_score["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1000,  # Added parameter
    xaxis_title="Size Score",
    yaxis_title=None,
    title="Projects with the highest Size Score",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
)
)

fig.update(layout_showlegend=False)


fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)
# KK: I'd suggest selecting a few most interesting examples conveying a message and put plots with raw data in the Appendix

fig = px.scatter(
    df_active.query("project_age_in_years<@max_age_in_years"),
    x="project_age_in_years",
    y="topic",
    size="size",
    color="total_score",
    custom_data=["project_name","oneliner","topic","git_url"],
    size_max=20,
)

fig.update_layout(
    coloraxis_colorbar=dict(title="Total Score"),
    height=1000,  # Added parameter
    xaxis_title="Project Age in Years",
    yaxis_title=None,
    title="Total Score of Projects",
    hoverlabel=dict(
    bgcolor="white"
)
)

fig.update_traces(
    hovertemplate="<br>".join([
        "Project Name: <b>%{customdata[0]}"
        "Project Info: <b>%{customdata[1]}</b>",
        "Topic: <b>%{customdata[2]}</b>",
        "Git URL: <b>%{customdata[3]}</b>"
    ])
)

fig.show()
# KK: I'd suggest selecting a few most interesting examples conveying a message and put plots with raw data in the Appendix

fig = px.scatter(
    df_organization_projects.query("project_age_in_years<@max_age_in_years"),
    x="project_age_in_years",
    y="topic",
    size="size",
    color="development_distribution_score",
    custom_data=["project_name","oneliner","topic","git_url"],
    size_max=20,
)

fig.update_layout(
    coloraxis_colorbar=dict(
        title="DDS",
    ),
    yaxis_title=None,
    xaxis_title="Project Age in Years",
    height=1000,  # Added parameter
    title="Development Distribution Score",
    hoverlabel=dict(
    bgcolor="white"
)
)
fig.update_traces(
    hovertemplate="<br>".join([
        "Project Name: <b>%{customdata[0]}"
        "Project Info: <b>%{customdata[1]}</b>",
        "Topic: <b>%{customdata[2]}</b>",
        "Git URL: <b>%{customdata[3]}</b>"
    ])
)

fig.show()
personal_stargazers = df_personal_projects.nlargest(40, "stargazers_count")

fig = px.bar(
    personal_stargazers,
    x=personal_stargazers["stargazers_count"],
    y=personal_stargazers["git_namespace"],
    orientation="h",
    custom_data=["oneliner","topic","git_url"],
    color=personal_stargazers["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1000,  # Added parameter
    yaxis_title=None,
    xaxis_title="Stars",
    title="Projects with most Stars in User Namespace",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
)
)


fig.update(layout_showlegend=False)
# KK: can topics be grouped in fewer categories? can DDS be bucketed into categories, e.g. 0.3>=, 0.3<=&<=0.6, 0.6>=? Do we need to show all three variables, projects, DDS and dependents? 

df_active["dependents_count"] = df_active["dependents_repos"].apply(count_strings)

most_dependent_projects = df_active.nlargest(50, "dependents_count")
most_dependent_projects = most_dependent_projects[most_dependent_projects["project_name"] != "Mission Support System"]
print("DDS of most used Python project:",round(most_dependent_projects["development_distribution_score"].median(),3))


fig = px.bar(
    most_dependent_projects,
    x=most_dependent_projects["dependents_count"],
    y=most_dependent_projects["project_name"],
    orientation="h",
    custom_data=["oneliner","topic","git_url"],
    color=most_dependent_projects["development_distribution_score"],
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(
    height=1000,  # Added parameter
    yaxis_title=None,
    xaxis_title="Dependents",
    title="Most used Python Projects vs. DDS",
    coloraxis_colorbar=dict(
    title="DDS",
    ),
    hoverlabel=dict(
    bgcolor="white"
)
)


fig.update_traces(
    hovertemplate="<br>".join([
        "Project Info: <b>%{customdata[0]}</b>",
        "Topic: <b>%{customdata[1]}</b>",
        "Git URL: <b>%{customdata[2]}</b>"
    ])
)
DDS of most used Python project: 0.42

Process the organisations#

df_organizations = pd.read_csv("./csv/github_organizations.csv")
df_organizations["ISO_3"] = df_organizations["location_country"].apply(name_to_iso3)
df_organizations["ISO_3_alpha2"] = df_organizations["ISO_3"].apply(alpha3_to_alpha2)
df_organizations["continent"] = df_organizations["ISO_3_alpha2"].apply(alpha2_to_continent)
continent_his = df_organizations["continent"].value_counts().to_frame().rename_axis("continent_name")
continent_his.rename(index={"EU": "Europe", "NA": "North America", "": "Global", "OC":"Oceania", "AS":"Asia", "SA":"South America", "AF":"Africa"},inplace=True)

print(continent_his)
fig = px.pie(continent_his.reset_index(), values="continent", names="continent_name", color_discrete_sequence=color_discrete_sequence, hole=0.2)

fig.update_layout(title="Distribution of Organizations between Continents", font_size=16, showlegend=False, hovermode=False)
fig.update_traces(textposition='outside', textinfo='label+percent', marker=dict(line=dict(color='#000000', width=2)))
fig.show()
                continent
continent_name           
Europe                208
North America         199
Global                180
Oceania                19
Asia                   12
South America           6
Africa                  4
# alternative to plotin cell 52

alt_df_organizations = df_organizations.copy()
vals_to_replace = {"EU": "Europe", "NA": "North America", "": "Global", "OC":"Other", "AS":"Other", "SA":"Other", "AF": "Other"}
alt_df_organizations['continent'] = alt_df_organizations['continent'].map(vals_to_replace)

alt_continent_his = alt_df_organizations["continent"].value_counts().to_frame().rename_axis("continent_name")

alt_fig = px.pie(alt_continent_his.reset_index(), values="continent", names="continent_name", color_discrete_sequence=color_discrete_sequence, hole=0.2)

alt_fig.update_layout(title="Distribution of Organizations between Continents", font_size=16, showlegend=False, hovermode=False)
alt_fig.update_traces(textposition='outside', textinfo='label+percent', marker=dict(line=dict(color='#000000', width=2)))
alt_fig.show()
# similar pooling to the one in cell 53 could be done here for Africa + Oceania

fig = px.pie(df_users_continent_cotoverse, values=0, names="index", color_discrete_sequence=color_discrete_sequence, hole=0.2)

fig.update_layout(title="Distribution of Users between Continents", font_size=16, showlegend=False, hovermode=False)
fig.update_traces(textposition='outside', textinfo='label+percent', marker=dict(line=dict(color='#000000', width=2)))
fig.show()
organization_his = (
    df_organizations["form_of_organization"]
    .value_counts()
    .to_frame()
    .rename_axis("organization")
    .reset_index()
)

organization_his["organization"] = organization_his["organization"].apply(upper_string)
fig = px.pie(organization_his, values="form_of_organization", names="organization", color_discrete_sequence=color_discrete_sequence, hole=0.2)

fig.update_layout(title="Distribution of Organizational Forms", font_size=16, showlegend=False, hovermode=False)
fig.update_traces(textposition='outside', textinfo='percent+label', marker=dict(line=dict(color='#000000', width=2)))
fig.show()
df_countries = (
    df_organizations["ISO_3"]
    .value_counts()
    .to_frame()
    .rename_axis("country")
    .reset_index()
)
df_countries = df_countries.rename(columns={"ISO_3": "counts"})

fig = px.choropleth(
    df_countries,
    locations="country",
    locationmode="ISO-3",
    color="counts",
    color_continuous_scale=color_continuous_scale
)

fig.update_layout(title="Global Distribution of Organisation",
                    coloraxis_colorbar=dict(
                    title="Organisations",
                    ),)

fig.show()